Check that this is not simply due to the central limit theorem (using artificial uniform distributions).
To explore the fossil sampling heterogeneity only keep occurrences within the species range.
Functional data
##
## FALSE TRUE
## 2 368
## [1] "Globoconella miotumida" "Neogloboquadrina atlantica"
##
## FALSE TRUE
## 85 368
Functional groups are only defined for Macroperforate foraminifera. Add an additional morphogroup for Microperforate foraminifera.
##
## Call:
## lm(formula = sampling_rate ~ relevel(morphogroup, ref = "2"),
## data = sampling_heterogeneity)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.268 -15.546 -5.154 10.773 127.271
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 23.1002 2.4715 9.347
## relevel(morphogroup, ref = "2")NA -11.4790 5.5570 -2.066
## relevel(morphogroup, ref = "2")1 -19.4626 9.7615 -1.994
## relevel(morphogroup, ref = "2")3 5.2506 4.7563 1.104
## relevel(morphogroup, ref = "2")4 11.0408 7.4600 1.480
## relevel(morphogroup, ref = "2")5 -16.6040 10.8435 -1.531
## relevel(morphogroup, ref = "2")6 -12.8679 9.7615 -1.318
## relevel(morphogroup, ref = "2")7 9.1462 3.9751 2.301
## relevel(morphogroup, ref = "2")8 -20.2430 21.2603 -0.952
## relevel(morphogroup, ref = "2")9 -5.1970 9.7615 -0.532
## relevel(morphogroup, ref = "2")10 -16.1625 8.3550 -1.934
## relevel(morphogroup, ref = "2")11 -21.5617 21.2603 -1.014
## relevel(morphogroup, ref = "2")12 5.4326 7.8641 0.691
## relevel(morphogroup, ref = "2")13 -0.8358 5.8289 -0.143
## relevel(morphogroup, ref = "2")14 0.8393 5.1357 0.163
## relevel(morphogroup, ref = "2")15 -1.8244 6.5777 -0.277
## relevel(morphogroup, ref = "2")16 -2.1235 7.4600 -0.285
## relevel(morphogroup, ref = "2")17 1.7028 4.3844 0.388
## relevel(morphogroup, ref = "2")18 7.2789 6.5777 1.107
## relevel(morphogroup, ref = "2")19 4.3778 7.8641 0.557
## relevel(morphogroup, ref = "2")Microperforate -5.9270 3.9240 -1.510
## Pr(>|t|)
## (Intercept) <2e-16 ***
## relevel(morphogroup, ref = "2")NA 0.0396 *
## relevel(morphogroup, ref = "2")1 0.0469 *
## relevel(morphogroup, ref = "2")3 0.2704
## relevel(morphogroup, ref = "2")4 0.1398
## relevel(morphogroup, ref = "2")5 0.1266
## relevel(morphogroup, ref = "2")6 0.1883
## relevel(morphogroup, ref = "2")7 0.0220 *
## relevel(morphogroup, ref = "2")8 0.3417
## relevel(morphogroup, ref = "2")9 0.5948
## relevel(morphogroup, ref = "2")10 0.0539 .
## relevel(morphogroup, ref = "2")11 0.3112
## relevel(morphogroup, ref = "2")12 0.4901
## relevel(morphogroup, ref = "2")13 0.8861
## relevel(morphogroup, ref = "2")14 0.8703
## relevel(morphogroup, ref = "2")15 0.7817
## relevel(morphogroup, ref = "2")16 0.7761
## relevel(morphogroup, ref = "2")17 0.6980
## relevel(morphogroup, ref = "2")18 0.2692
## relevel(morphogroup, ref = "2")19 0.5781
## relevel(morphogroup, ref = "2")Microperforate 0.1318
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.12 on 349 degrees of freedom
## Multiple R-squared: 0.1018, Adjusted R-squared: 0.05031
## F-statistic: 1.977 on 20 and 349 DF, p-value: 0.007872
##
## Call:
## lm(formula = sampling_rate ~ relevel(ecogroup, ref = "1"), data = sampling_heterogeneity)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.442 -15.265 -6.601 13.137 128.785
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.262 2.062 13.705 < 2e-16 ***
## relevel(ecogroup, ref = "1")NA -12.603 3.319 -3.797 0.000172 ***
## relevel(ecogroup, ref = "1")2 -7.935 4.000 -1.984 0.048059 *
## relevel(ecogroup, ref = "1")3 -2.153 2.984 -0.722 0.470994
## relevel(ecogroup, ref = "1")4 -10.284 3.477 -2.958 0.003299 **
## relevel(ecogroup, ref = "1")5 11.775 10.765 1.094 0.274774
## relevel(ecogroup, ref = "1")6 -20.267 10.765 -1.883 0.060538 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.13 on 363 degrees of freedom
## Multiple R-squared: 0.06442, Adjusted R-squared: 0.04896
## F-statistic: 4.166 on 6 and 363 DF, p-value: 0.0004588
Correlations between categorical factors
## quartz_off_screen
## 2
-> A Gamma family seems to be a better fit
##
## Call:
## glm(formula = sampling_rate ~ rel.abun + abs(pal.lat) + pal.long +
## age + sample.depth + pal.absLat.var + pal.long.var, family = Gamma(link = "log"),
## data = sampling_heterogeneity[sampling_heterogeneity$rel.abun <
## 25, ])
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1408310 0.1764896 6.464 3.52e-10 ***
## rel.abun 0.1113350 0.0124575 8.937 < 2e-16 ***
## abs(pal.lat) -0.0054140 0.0029380 -1.843 0.066232 .
## pal.long -0.0009055 0.0005529 -1.638 0.102388
## age 0.0064433 0.0030998 2.079 0.038398 *
## sample.depth -0.0016615 0.0004803 -3.460 0.000609 ***
## pal.absLat.var -0.0008867 0.0004294 -2.065 0.039667 *
## pal.long.var 2.2019945 0.2012628 10.941 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.7073319)
##
## Null deviance: 427.27 on 349 degrees of freedom
## Residual deviance: 279.56 on 342 degrees of freedom
## (16 observations deleted due to missingness)
## AIC: 2778.3
##
## Number of Fisher Scoring iterations: 8
##
## Call:
## glm(formula = sampling_rate ~ rel.abun + abs(pal.lat) + pal.long +
## age + sample.depth + pal.absLat.var + pal.long.var + reason +
## age.calc + morphogroup + ecogroup, family = Gamma(link = "log"),
## data = sampling_heterogeneity[sampling_heterogeneity$rel.abun <
## 25, ])
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1415025 0.2428189 4.701 3.90e-06 ***
## rel.abun 0.1105284 0.0122961 8.989 < 2e-16 ***
## abs(pal.lat) -0.0052771 0.0027609 -1.911 0.0569 .
## pal.long -0.0006712 0.0004945 -1.357 0.1757
## age 0.0052764 0.0034019 1.551 0.1219
## sample.depth -0.0021961 0.0004482 -4.900 1.55e-06 ***
## pal.absLat.var -0.0003828 0.0004140 -0.925 0.3558
## pal.long.var 1.7322116 0.2066426 8.383 1.82e-15 ***
## reasonBiostratigraphy 0.1944948 0.1049871 1.853 0.0649 .
## reasonCommunity analysis -0.1901695 0.2155368 -0.882 0.3783
## reasonSelected species -0.0562383 0.2641076 -0.213 0.8315
## age.calcModel -0.6644389 0.1217975 -5.455 9.99e-08 ***
## age.calcZone -0.9217861 0.3822318 -2.412 0.0165 *
## age.calcInterp -2.0249430 0.3092892 -6.547 2.42e-10 ***
## age.calcMagneto -3.4595271 0.4652247 -7.436 1.01e-12 ***
## morphogroup1 -1.2810255 0.5806592 -2.206 0.0281 *
## morphogroup2 -0.4635724 0.4335480 -1.069 0.2858
## morphogroup3 0.0146053 0.4903191 0.030 0.9763
## morphogroup4 0.4160399 0.5228461 0.796 0.4268
## morphogroup5 -1.0172240 0.6290322 -1.617 0.1069
## morphogroup6 -0.4521186 0.5401644 -0.837 0.4032
## morphogroup7 -0.3567651 0.4539062 -0.786 0.4325
## morphogroup8 -0.6421687 0.8720130 -0.736 0.4620
## morphogroup9 -0.3533097 0.5913296 -0.597 0.5506
## morphogroup10 -1.2914789 0.5412941 -2.386 0.0176 *
## morphogroup12 0.0121785 0.5208390 0.023 0.9814
## morphogroup13 -0.1558879 0.4863852 -0.321 0.7488
## morphogroup14 0.0157533 0.4654325 0.034 0.9730
## morphogroup15 -0.0388340 0.4966204 -0.078 0.9377
## morphogroup16 -0.6133349 0.5036000 -1.218 0.2242
## morphogroup17 -0.0991667 0.4926373 -0.201 0.8406
## morphogroup18 -0.1142061 0.5365418 -0.213 0.8316
## morphogroup19 -0.3300354 0.5495653 -0.601 0.5486
## morphogroupMicroperforate 0.3641262 0.2241514 1.624 0.1053
## ecogroup1 0.6459100 0.4243819 1.522 0.1290
## ecogroup2 0.9589251 0.4176330 2.296 0.0223 *
## ecogroup3 0.6903083 0.4022405 1.716 0.0871 .
## ecogroup4 0.5516943 0.4060506 1.359 0.1752
## ecogroup5 1.1783699 0.5702967 2.066 0.0396 *
## ecogroup6 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.5355358)
##
## Null deviance: 427.27 on 349 degrees of freedom
## Residual deviance: 199.89 on 311 degrees of freedom
## (16 observations deleted due to missingness)
## AIC: 2710.2
##
## Number of Fisher Scoring iterations: 11
## Model matrix is rank deficient. Parameters `ecogroup6` were not
## estimable.
##
## Family: Gamma
## Link function: log
##
## Formula:
## sampling_rate ~ s(pal.lat, pal.long, bs = "sos", k = 40) + s(rel.abun,
## k = 3) + s(pal.absLat.var, k = 3) + s(pal.long.var, k = 3) +
## te(age, sample.depth, k = 4) + reason + age.calc + morphogroup +
## ecogroup
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.53239 0.18784 13.482 < 2e-16 ***
## reasonBiostratigraphy 0.24683 0.11040 2.236 0.0261 *
## reasonCommunity analysis -0.09319 0.22284 -0.418 0.6761
## reasonSelected species -0.03980 0.25572 -0.156 0.8764
## age.calcModel -0.71605 0.11946 -5.994 6.02e-09 ***
## age.calcZone -0.90454 0.36558 -2.474 0.0139 *
## age.calcInterp -1.87800 0.30506 -6.156 2.46e-09 ***
## age.calcMagneto -3.41394 0.45613 -7.485 8.53e-13 ***
## morphogroup1 -1.01378 0.80266 -1.263 0.2076
## morphogroup2 -0.33954 0.72922 -0.466 0.6418
## morphogroup3 0.01080 0.75501 0.014 0.9886
## morphogroup4 0.30426 0.77339 0.393 0.6943
## morphogroup5 -0.73563 0.83841 -0.877 0.3810
## morphogroup6 -0.34392 0.83590 -0.411 0.6811
## morphogroup7 -0.17911 0.72863 -0.246 0.8060
## morphogroup8 0.00000 0.00000 NaN NaN
## morphogroup9 -0.56360 0.81571 -0.691 0.4902
## morphogroup10 -1.30969 0.78398 -1.671 0.0959 .
## morphogroup12 0.33560 0.75950 0.442 0.6589
## morphogroup13 -0.07910 0.74469 -0.106 0.9155
## morphogroup14 0.15824 0.72598 0.218 0.8276
## morphogroup15 0.19555 0.75417 0.259 0.7956
## morphogroup16 -0.32805 0.75809 -0.433 0.6655
## morphogroup17 -0.15920 0.75459 -0.211 0.8331
## morphogroup18 -0.07157 0.77372 -0.093 0.9264
## morphogroup19 -0.33954 0.78922 -0.430 0.6673
## morphogroupMicroperforate 0.50803 0.21960 2.313 0.0214 *
## ecogroup1 0.60117 0.75446 0.797 0.4262
## ecogroup2 0.85344 0.75341 1.133 0.2582
## ecogroup3 0.61417 0.73598 0.834 0.4047
## ecogroup4 0.40941 0.75028 0.546 0.5857
## ecogroup5 0.89907 0.83381 1.078 0.2818
## ecogroup6 -0.06011 0.83734 -0.072 0.9428
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(pal.lat,pal.long) 1.796e+01 39 1.538 1.05e-06 ***
## s(rel.abun) 1.856e+00 2 29.043 < 2e-16 ***
## s(pal.absLat.var) 4.451e-04 2 0.000 0.496
## s(pal.long.var) 9.835e-01 2 31.308 < 2e-16 ***
## te(age,sample.depth) 5.449e+00 15 1.848 5.70e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 92/93
## R-sq.(adj) = 0.332 Deviance explained = 62.2%
## -REML = 1338.7 Scale est. = 0.46559 n = 350
## quartz_off_screen
## 2
## quartz_off_screen
## 2
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning in AIC.default(sampling_heterogeneity_GLM,
## sampling_heterogeneity_GLM_simplified, : models are not all fitted to the same
## number of observations
## Warning in BIC.default(sampling_heterogeneity_GLM,
## sampling_heterogeneity_GLM_simplified, : models are not all fitted to the same
## number of observations
Derive the respective contribution of all the predictors to the explained variance.
## $Explained.deviance
## [1] 0.6223479
##
## $hierarchical.partitioning
## Unique Average.share Individual I.perc(%)
## s(pal.lat,pal.long,bs="sos",k=40) 0.0629 0.0624 0.1253 20.13
## s(rel.abun,k=3) 0.0438 0.0480 0.0918 14.75
## s(pal.absLat.var,k=3) 0.0000 0.0231 0.0231 3.71
## s(pal.long.var,k=3) 0.0525 0.0524 0.1049 16.86
## te(age,sample.depth,k=4) 0.0176 0.0495 0.0671 10.78
## reason 0.0117 -0.0010 0.0107 1.72
## age.calc 0.0922 0.0366 0.1288 20.70
## morphogroup 0.0376 0.0145 0.0521 8.37
## ecogroup 0.0071 0.0114 0.0185 2.97
##
## $variables
## [1] "s(pal.lat,pal.long,bs=\"sos\",k=40)" "s(rel.abun,k=3)"
## [3] "s(pal.absLat.var,k=3)" "s(pal.long.var,k=3)"
## [5] "te(age,sample.depth,k=4)" "reason"
## [7] "age.calc" "morphogroup"
## [9] "ecogroup"
##
## $type
## [1] "hierarchical.partitioning"
##
## attr(,"class")
## [1] "gamhp"